#### SIMULATIONS TO COMPARE WHETHER INVESTING IN A DLE VS STANDARD LE IS WORTH IT
#### ASSUME IMPLEMENTING DLE COSTS YOU SOME OF YOUR SAMPLE AND VARY COST

#### Packages ####
library(tidyverse) # data manipulation
library(data.table) # high performance data.frame operations
library(DeclareDesign) # simulation pipeline
library(future) # parallel processing
library(here) # relative path management

#### EXPANDABLE DESIGN ####
#### Parameters ####
## N = sample size
## rho = correlation between baseline lists
## cost = sample loss rate (random)
designer = function(N, rho, cost){
  # MODEL
  model = declare_model(
    N = N,
    count_a = rbinom(N, size = 4, prob = 0.5),
    count_b = correlate(
      given = count_a,
      rho = rho, # rank correlations between lists
      rbinom,
      size = 4,
      prob = 0.5),
    # X is whether they have sensitive trait
    # Based on Blair et al 2020
    # a single LE would not detect this effect size
    # but a DLE would have enough power at 80%
    X = rbinom(N, size = 1, prob = 0.15),
    # R indicates whether they remain under DLE
    R = rbinom(N, 1, prob  = 1 - cost),
    potential_outcomes(Y_a ~ X * Z_a + count_a,
                       conditions = list(Z_a = c(0,1))),
    potential_outcomes(Y_b ~ X * Z_b + count_b,
                       conditions = list(Z_b = c(0,1)))
  )
  
  # INQUIRY
  inquiry = declare_inquiry(prevalence_rate = mean(X))
  
  # DATA STRATEGY
  assign = declare_assignment(Z_a = complete_ra(N),
                              Z_b = 1 - Z_a)
  
  measurement = declare_measurement(Y_a = reveal_outcomes(Y_a ~ Z_a),
                                    Y_b = reveal_outcomes(Y_b ~ Z_b))

  # ANSWER STRATEGY
  single_a = declare_estimator(Y_a ~ Z_a, inquiry = "prevalence_rate", 
                               label = "List A")
  
  single_b = declare_estimator(Y_b ~ Z_b, inquiry = "prevalence_rate",
                               label = "List B")
  
  dle_handler = declare_step(handler = function(data){
    data %>% 
      pivot_longer(cols = c(Z_a, Z_b, Y_a, Y_b)) %>% 
      separate(name, into = c("variable", "list")) %>% 
      pivot_wider(names_from = "variable", values_from = "value")}
  )
  
  dle = declare_estimator(Y ~ Z + list, clusters = ID, se_type = "stata",
                          subset = R == 1, inquiry = "prevalence_rate", 
                          label = "DLE")
  
  model + inquiry + assign + measurement + single_a + single_b +
    dle_handler + dle
}

designs = expand_design(designer,
                        N = 1000, # sample size
                        rho =  c(0, 0.4, 0.8), # correlation between baseline lists
                        cost = seq(0, 0.5, by = 0.1),
                        expand = TRUE)

#### SIMULATE ####
plan(multicore) # ignored in Windows or RStudio
set.seed(20220812)
cost_sim = simulate_design(designs, sims = 1000)
plan(sequential)

head(cost_sim)

#### SAVE ####
save(cost_sim, file = here("cost_sim.rda"))



